library(tidyverse)
library(lubridate)
library(maps)
library(viridis)
library(plotly)
library(dplyr)
#REPORT DATA
report_03_11_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-11-2020.csv")) %>%
rename(Country_Region = "Country/Region", Province_State = "Province/State")
report_03_23_2020 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-23-2020.csv"))
#TIME SERIES DATA
time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region")
time_series_confirmed_long <- time_series_confirmed %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Confirmed")
time_series_deaths <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region")
time_series_deaths_long <- time_series_deaths %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Deaths")
time_series_confirmed_long <- time_series_confirmed_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
time_series_deaths_long <- time_series_deaths_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".") %>%
select(Key, Deaths)
time_series_long_joined <- full_join(time_series_confirmed_long,
time_series_deaths_long, by = c("Key"))
#FOR RECOVERED DATA
time_series_recovered <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv")) %>%
rename(Province_State = "Province/State", Country_Region = "Country/Region")
time_series_recovered_long <- time_series_recovered %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long),
names_to = "Date", values_to = "Recovered")
time_series_recovered_long <- time_series_recovered_long %>%
unite(Key, Province_State, Country_Region, Date, sep = ".") %>%
select(Key, Recovered)
time_series_long_joined <- full_join(time_series_long_joined,
time_series_recovered_long, by = c("Key")) %>%
select(-Key)
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
time_series_long_joined_counts <- time_series_long_joined %>%
pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date),
names_to = "Report_Type", values_to = "Counts")
#EXERCISE PART 1
joined_interactive <- time_series_long_joined_counts %>%
group_by(Country_Region, Report_Type, Date) %>%
summarise(Counts = sum(Counts)) %>%
filter (Country_Region %in% c("US", "China", "Italy", "Japan")) %>%
filter (Report_Type %in% c("Confirmed", "Deaths", "Recovered")) %>%
ggplot(aes(x = Date, y = log2(Counts), fill = Report_Type)) +
geom_point() +
geom_line() +
ggtitle("Worldwide COVID-19 Cases") +
facet_grid(Country_Region~.)
ggplotly(joined_interactive)
#EXERCISE PART 2
time_series_casefatality_long <- time_series_long_joined %>%
group_by(Country_Region) %>%
summarise(Deaths = sum(Deaths), Confimed = sum(Confirmed)) %>%
filter (Country_Region %in% c("US", "China", "Italy", "Japan")) %>%
mutate(Case_Fatality_Rate = Deaths/Confimed) %>%
arrange(desc(Case_Fatality_Rate)) %>%
ggplot(aes(x = Country_Region, y = Case_Fatality_Rate)) +
geom_col()+
geom_line() +
ggtitle("Worldwide COVID-19 Case Fatality Rate")
head(time_series_casefatality_long)
## $data
## # A tibble: 4 x 4
## Country_Region Deaths Confimed Case_Fatality_Rate
## <chr> <dbl> <dbl> <dbl>
## 1 Italy 934608 7406494 0.126
## 2 US 1453268 27613580 0.0526
## 3 China 291906 7190724 0.0406
## 4 Japan 9970 371910 0.0268
##
## $layers
## $layers[[1]]
## geom_col: width = NULL, na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_stack
##
## $layers[[2]]
## geom_line: na.rm = FALSE
## stat_identity: na.rm = FALSE
## position_identity
##
##
## $scales
## <ggproto object: Class ScalesList, gg>
## add: function
## clone: function
## find: function
## get_scales: function
## has_scale: function
## input: function
## n: function
## non_position_scales: function
## scales: list
## super: <ggproto object: Class ScalesList, gg>
##
## $mapping
## Aesthetic mapping:
## * `x` -> `Country_Region`
## * `y` -> `Case_Fatality_Rate`
##
## $theme
## list()
##
## $coordinates
## <ggproto object: Class CoordCartesian, Coord, gg>
## aspect: function
## backtransform_range: function
## clip: on
## default: TRUE
## distance: function
## expand: TRUE
## is_free: function
## is_linear: function
## labels: function
## limits: list
## modify_scales: function
## range: function
## render_axis_h: function
## render_axis_v: function
## render_bg: function
## render_fg: function
## setup_data: function
## setup_layout: function
## setup_panel_params: function
## setup_params: function
## transform: function
## super: <ggproto object: Class CoordCartesian, Coord, gg>
ggplotly(time_series_casefatality_long)